> library(tidyverse)
> library(ggplot2)
> library(cregg)
> library(cjoint)
> library(readstata13)
> library(data.table)
> library(qwraps2)
> library(survey)
> library(janitor)
> #plot theme
> plot_theme =  theme_bw()+ theme(text = element_text(size=10), 
+                                 panel.grid.major = element_blank(),
+                                 panel.grid.minor = element_blank(),
+                                 panel.grid.major.y = element_line(colour = "#CCCCCC"),
+                                 axis.line = element_line(colour = "black"),
+                                 strip.background=element_rect(colour=NA, fill="#DDDDDD"),
+                                 legend.title=element_blank(), 
+                                 legend.position="top",
+                                 axis.title=element_text(size=14,face="bold"), 
+                                 axis.text=element_text(size=12),
+                                 strip.text.x = element_text(size = 12),
+                                 legend.text=element_text(size=12), 
+                                 axis.text.x = element_text(angle = 45, hjust = 1, vjust=1)) 
> #plot features
> pd<-position_dodge(.2)
> pd_text<-position_dodge(1)
> 
> #number of runs for boot strap 
> runs = 3000
> 
> 
> ### Table A1
> ### Bovitz sample 
> bovitz_demos_df <- read.dta13("../final_w1w2.dta")
Warning message:
In read.dta13("../final_w1w2.dta") : 
   Factor codes of type double or float detected in variables

   educ, pid3_lean, pid3_lean_w2, mark_up_cat

   No labels have been assigned.
   Set option 'nonint.factors = TRUE' to assign labels anyway.

> 
> 
> #pid
> bovitz_demos_df <- bovitz_demos_df %>% mutate(party_id = case_when( 
+   q2_w1 == 1  ~ "Democratic", 
+   q2_w1 == 2 ~ "Republican",
+   q2_w1 == 3 ~ "Independent",
+   q2_w1 == 4 ~ "Other",
+   q2_w1 == 5 ~ "Don't know",
+   TRUE ~ NA_character_
+ )) 
> 
> 
> #income
> bovitz_demos_df <- bovitz_demos_df %>% mutate(income =  case_when( 
+   inc <= 2  ~ "Less than $30,000", 
+   inc > 2 & inc < 6 ~ "Between $30,000 and $59,000",
+   inc >= 6 & inc < 10 ~ "Between $60,000 and $120,000",
+   inc >= 10 ~ "More than $120,000",
+   inc == 5 ~ "Don't know",
+   TRUE ~ NA_character_), income = factor(income, levels=c("Less than $30,000" , "Between $30,000 and $59,000" , "Between $60,000 and $120,000" , "More than $120,000")))
> 
> #gender
> bovitz_demos_df <- bovitz_demos_df %>% mutate(gender_demo =  case_when( 
+   gender == 1  ~ "Male", 
+   gender == 2  ~ "Female",
+   gender == 3 ~ "Other",
+   is.na(gender) ~ "Other",
+   TRUE ~ NA_character_), gender_demo = factor(gender_demo, levels=c("Male" , "Female" , "Other")))
> 
> 
> 
> #edu
> bovitz_demos_df <- bovitz_demos_df %>% mutate(edu =  case_when( 
+   q22_w1 == 1 ~  "Less than high school", 
+   q22_w1 == 2 ~  "High school graduate", 
+   q22_w1 == 3 ~  "Some college", 
+   q22_w1 == 4 ~  "2 year degree", 
+   q22_w1 == 5 ~  "4 year degree", 
+   q22_w1 == 6 ~  "Post-grad", 
+   q22_w1 == 7 ~  "Post-grad", 
+   TRUE ~ NA_character_), edu = factor(edu, levels=c(
+     "Less than high school", "High school graduate", "Some college", "2 year degree", "4 year degree", "Post-grad" )))
> 
> 
> #age 
> bovitz_demos_df <- bovitz_demos_df %>% mutate(age = age+17, age_range = case_when(
+   age >= 18 & age <= 29 ~ "18--29",
+   age >= 30 & age <= 39 ~ "30--39",
+   age >= 40 & age <= 49 ~ "40--49",
+   age >= 50 & age <= 59 ~ "50--59",
+   age >= 60 & age <= 69 ~ "60--69",
+   age >= 70 ~  "70+", 
+   TRUE ~ NA_character_), age_range = factor(age_range, levels= c("18--29", "30--39", "40--49", "50--59", "60--69", "70+")))
> 
> 
> 
> #get demos for unique individuals in the main analysis
> bovitz_demo <- bovitz_demos_df %>% select(edu,  income, gender_demo, age_range, party_id, wave1_only, task_num, profile_chosen)  %>% filter(is.na(wave1_only) & task_num == 1 & profile_chosen == 1 )
> 
> 
> options(qwraps2_markup="markdown")
> bovitz_demos_summary<- list("Gender" =  
+                        list("Male"=  ~ n_perc(gender_demo == "Male", na_rm=TRUE),
+                             "Female" = ~ n_perc(gender_demo == "Female", na_rm=TRUE),
+                             "Other" = ~ n_perc(gender_demo == "Other", na_rm=TRUE)), 
+                      "Income" = 
+                        list("Less than $30,000"=  ~ n_perc(income == "Less than $30,000", na_rm=TRUE),
+                             "Between $30,000 and $59,000" = ~ n_perc(income == "Between $30,000 and $59,000", na_rm=TRUE),
+                             "Between $60,000 and $120,000" = ~ n_perc(income == "Between $60,000 and $120,000", na_rm=TRUE),
+                             "More than $120,000" = ~ n_perc(income == "More than $120,000", na_rm=TRUE)),
+                      "Age" = 
+                        list("18--29" =  ~ n_perc(age_range == "18--29", na_rm=TRUE), 
+                             "30--39" =  ~ n_perc(age_range == "30--39", na_rm=TRUE), 
+                             "40--49" =  ~ n_perc(age_range == "40--49", na_rm=TRUE), 
+                             "50--59" =  ~ n_perc(age_range == "50--59", na_rm=TRUE), 
+                             "60--69" =  ~ n_perc(age_range == "60--69", na_rm=TRUE), 
+                             "70+"    =  ~ n_perc(age_range == "70+", na_rm=TRUE)), 
+                      "Party ID" = 
+                        list("Democratic"=  ~ n_perc(party_id == "Democratic", na_rm=TRUE),
+                             "Republican"=  ~ n_perc(party_id == "Republican", na_rm=TRUE),
+                             "Independent" =  ~ n_perc(party_id == "Independent", na_rm=TRUE),
+                             "Other"=  ~ n_perc(party_id == "Other" | party_id == "Don't know" , na_rm=TRUE)),
+                      "Education" =
+                        list("Less than high school"=  ~ n_perc(edu == "Less than high school", na_rm=TRUE),
+                             "High school graduate"=  ~ n_perc(edu == "High school graduate", na_rm=TRUE),
+                             "Some college" =  ~ n_perc(edu == "Some college", na_rm=TRUE),
+                             "2 year degree"=  ~ n_perc(edu == "2 year degree", na_rm=TRUE), 
+                             "4 year degree"=  ~ n_perc(edu == "4 year degree", na_rm=TRUE), 
+                             "Post-grad"=  ~ n_perc(edu == "Post-grad", na_rm=TRUE)))
> 
> bovitz_demos_table <- summary_table(bovitz_demo, bovitz_demos_summary) 
> bovitz_demos_table


|                                          |bovitz_demo (N = 996) |
|:-----------------------------------------|:---------------------|
|**Gender**                                |&nbsp;&nbsp;          |
|&nbsp;&nbsp; Male                         |493 (49.50%)          |
|&nbsp;&nbsp; Female                       |498 (50.00%)          |
|&nbsp;&nbsp; Other                        |5 (0.50%)             |
|**Income**                                |&nbsp;&nbsp;          |
|&nbsp;&nbsp; Less than $30,000            |280 (28.11%)          |
|&nbsp;&nbsp; Between $30,000 and $59,000  |282 (28.31%)          |
|&nbsp;&nbsp; Between $60,000 and $120,000 |271 (27.21%)          |
|&nbsp;&nbsp; More than $120,000           |163 (16.37%)          |
|**Age**                                   |&nbsp;&nbsp;          |
|&nbsp;&nbsp; 18--29                       |190 (19.08%)          |
|&nbsp;&nbsp; 30--39                       |212 (21.29%)          |
|&nbsp;&nbsp; 40--49                       |194 (19.48%)          |
|&nbsp;&nbsp; 50--59                       |193 (19.38%)          |
|&nbsp;&nbsp; 60--69                       |143 (14.36%)          |
|&nbsp;&nbsp; 70+                          |64 (6.43%)            |
|**Party ID**                              |&nbsp;&nbsp;          |
|&nbsp;&nbsp; Democratic                   |439 (44.08%)          |
|&nbsp;&nbsp; Republican                   |241 (24.20%)          |
|&nbsp;&nbsp; Independent                  |268 (26.91%)          |
|&nbsp;&nbsp; Other                        |48 (4.82%)            |
|**Education**                             |&nbsp;&nbsp;          |
|&nbsp;&nbsp; Less than high school        |25 (2.51%)            |
|&nbsp;&nbsp; High school graduate         |210 (21.08%)          |
|&nbsp;&nbsp; Some college                 |226 (22.69%)          |
|&nbsp;&nbsp; 2 year degree                |113 (11.35%)          |
|&nbsp;&nbsp; 4 year degree                |286 (28.71%)          |
|&nbsp;&nbsp; Post-grad                    |136 (13.65%)          |
> 
> ## lucid 
> 
> lucid_demos_df <- read.dta13("Figure A6/lucid_formatted.dta")  
Warning message:
In read.dta13("Figure A6/lucid_formatted.dta") : 
   Factor codes of type double or float detected in variables

   pid3_lean

   No labels have been assigned.
   Set option 'nonint.factors = TRUE' to assign labels anyway.

> 
> 
> lucid_demos_df <- lucid_demos_df %>% mutate(party_id = factor(Q2, levels=c("Democrat", "Republican", "Independent", "Other", "Don't know")))
> 
> lucid_demos_df <- lucid_demos_df %>% mutate(gender = factor(Q52, levels=c("Male" , "Female" , "Other")))
> 
> lucid_demos_df <- lucid_demos_df %>% mutate(edu =  case_when( 
+   Q53 == "Professional degree" ~  "Post-grad", 
+   Q53 == "Doctorate" ~  "Post-grad", 
+   TRUE ~ Q53), edu = factor(edu, levels=c(
+     "Less than high school", "High school graduate", "Some college", "2 year degree", "4 year degree", "Post-grad" )))
> 
> 
> lucid_demos_df <- lucid_demos_df %>% mutate(age_range = case_when(
+   Q49 >= 18 & Q49 <= 29 ~ "18--29",
+   Q49 >= 30 & Q49 <= 39 ~ "30--39",
+   Q49 >= 40 & Q49 <= 49 ~ "40--49",
+   Q49 >= 50 & Q49 <= 59 ~ "50--59",
+   Q49 >= 60 & Q49 <= 69 ~ "60--69",
+   Q49 >= 70 ~  "70+", 
+   TRUE ~ NA_character_), age_range = factor(age_range, levels= c("18--29", "30--39", "40--49", "50--59", "60--69", "70+")))
> 
> 
> lucid_demos_df <- lucid_demos_df %>% mutate(income =  case_when( 
+   hhi == -3105 ~ NA_character_,
+   hhi >= 1 & hhi <= 4  ~ "Less than $30,000", 
+   hhi >= 5 & hhi <= 10 ~ "Between $30,000 and $59,000",
+   hhi >= 11 & hhi <= 19 ~ "Between $60,000 and $125,000",
+   hhi >= 20 ~ "More than $125,000",
+   TRUE ~ NA_character_), income = factor(income, levels=c("Less than $30,000" , "Between $30,000 and $59,000" , "Between $60,000 and $125,000" , "More than $125,000")))
> 
> 
> #compelte cases
> lucid_demo <- lucid_demos_df %>% select(edu, income, gender, age_range, party_id, task_num, profile_chosen) %>% filter(task_num ==1 & profile_chosen == 1) %>% na.omit()
> 
> 
> library(qwraps2)
> options(qwraps2_markup="markdown")
> lucid_demos_summary<- list("Gender" =  
+                              list("Male"=  ~ n_perc(gender == "Male"),
+                                   "Female" = ~ n_perc(gender == "Female"),
+                                   "Other" = ~ n_perc(gender == "Other")), 
+                            "Income" = 
+                              #note the mismatch on 120 vs 125. see footnote in appendix
+                              list("Less than $30,000"=  ~ n_perc(income == "Less than $30,000", na_rm=TRUE),
+                                   "Between $30,000 and $59,000" = ~ n_perc(income == "Between $30,000 and $59,000", na_rm=TRUE),
+                                   "Between $60,000 and $120,000" = ~ n_perc(income == "Between $60,000 and $125,000", na_rm=TRUE),
+                                   "More than $120,000" = ~ n_perc(income == "More than $125,000", na_rm=TRUE)),
+                            "Age" = 
+                              list("18--29" =  ~ n_perc(age_range == "18--29", na_rm=TRUE), 
+                                   "30--39" =  ~ n_perc(age_range == "30--39", na_rm=TRUE), 
+                                   "40--49" =  ~ n_perc(age_range == "40--49", na_rm=TRUE), 
+                                   "50--59" =  ~ n_perc(age_range == "50--59", na_rm=TRUE), 
+                                   "60--69" =  ~ n_perc(age_range == "60--69", na_rm=TRUE), 
+                                   "70+"    =  ~ n_perc(age_range == "70+", na_rm=TRUE)), 
+                            "Party ID" = 
+                              list("Democratic"=  ~ n_perc(party_id == "Democrat", na_rm=TRUE),
+                                   "Republican"=  ~ n_perc(party_id == "Republican", na_rm=TRUE),
+                                   "Independent" =  ~ n_perc(party_id == "Independent", na_rm=TRUE),
+                                   "Other"=  ~ n_perc(party_id == "Other", na_rm=TRUE)),
+                            "Education" =
+                              list("Less than high school"=  ~ n_perc(edu == "Less than high school", na_rm=TRUE),
+                                   "High school graduate"=  ~ n_perc(edu == "High school graduate", na_rm=TRUE),
+                                   "Some college" =  ~ n_perc(edu == "Some college", na_rm=TRUE),
+                                   "2 year degree"=  ~ n_perc(edu == "2 year degree", na_rm=TRUE), 
+                                   "4 year degree"=  ~ n_perc(edu == "4 year degree", na_rm=TRUE), 
+                                   "Post-grad"=  ~ n_perc(edu == "Post-grad", na_rm=TRUE)))
> 
> lucid_demos_table <- summary_table(lucid_demo, lucid_demos_summary)
> lucid_demos_table


|                                          |lucid_demo (N = 1,025) |
|:-----------------------------------------|:----------------------|
|**Gender**                                |&nbsp;&nbsp;           |
|&nbsp;&nbsp; Male                         |511 (49.85%)           |
|&nbsp;&nbsp; Female                       |512 (49.95%)           |
|&nbsp;&nbsp; Other                        |2 (0.20%)              |
|**Income**                                |&nbsp;&nbsp;           |
|&nbsp;&nbsp; Less than $30,000            |321 (31.32%)           |
|&nbsp;&nbsp; Between $30,000 and $59,000  |271 (26.44%)           |
|&nbsp;&nbsp; Between $60,000 and $120,000 |267 (26.05%)           |
|&nbsp;&nbsp; More than $120,000           |166 (16.20%)           |
|**Age**                                   |&nbsp;&nbsp;           |
|&nbsp;&nbsp; 18--29                       |200 (19.51%)           |
|&nbsp;&nbsp; 30--39                       |226 (22.05%)           |
|&nbsp;&nbsp; 40--49                       |178 (17.37%)           |
|&nbsp;&nbsp; 50--59                       |145 (14.15%)           |
|&nbsp;&nbsp; 60--69                       |170 (16.59%)           |
|&nbsp;&nbsp; 70+                          |106 (10.34%)           |
|**Party ID**                              |&nbsp;&nbsp;           |
|&nbsp;&nbsp; Democratic                   |489 (47.71%)           |
|&nbsp;&nbsp; Republican                   |252 (24.59%)           |
|&nbsp;&nbsp; Independent                  |238 (23.22%)           |
|&nbsp;&nbsp; Other                        |21 (2.05%)             |
|**Education**                             |&nbsp;&nbsp;           |
|&nbsp;&nbsp; Less than high school        |26 (2.54%)             |
|&nbsp;&nbsp; High school graduate         |228 (22.24%)           |
|&nbsp;&nbsp; Some college                 |219 (21.37%)           |
|&nbsp;&nbsp; 2 year degree                |111 (10.83%)           |
|&nbsp;&nbsp; 4 year degree                |244 (23.80%)           |
|&nbsp;&nbsp; Post-grad                    |197 (19.22%)           |
> 
> 
> ##cces -- can't use summary table becuase it doesn't do weights. 
> cces_demos_df <- read.dta13("CCES/cces.dta")
> #pid
> cces_demos_df <- cces_demos_df %>% mutate(pid_demos = case_when( 
+   pid3_leaner == 1  ~ "Democrat", 
+   pid3_leaner == 2 ~ "Republican",
+   pid3_leaner == 3 ~ "Independent",
+   pid3_leaner == 8 ~ "Other",
+   TRUE ~ NA_character_)) %>%  mutate(pid_demos = factor(pid_demos, levels=c("Democrat", "Republican", "Independent", "Other")))
> 
> #income 
> cces_demos_df <- cces_demos_df %>% 
+   mutate(income_range = factor(income_range, levels=c("Less than $30,000", "Between $30,000 and $59,999", "Between $60,000 and $120,000", "$120,000 or more", "Prefer not to say")))
> 
> #age
> cces_demos_df <- cces_demos_df %>% 
+   mutate(age_range = factor(age_range, levels= c("18--29", "30--39", "40--49", "50--59", "60--69", "70+")))
> 
> #gender
> cces_demos_df <- cces_demos_df %>% mutate(gender_demos =  case_when( 
+   gender == 1  ~ "Male", 
+   gender == 2  ~ "Female",
+   TRUE ~ NA_character_), gender_demos = factor(gender_demos, levels=c("Male" , "Female", "Other")))
> 
> #edu
> cces_demos_df <- cces_demos_df %>% mutate(edu =  case_when( 
+   educ == 1 ~  "Less than high school", 
+   educ == 2 ~  "High school graduate", 
+   educ == 3 ~  "Some college", 
+   educ == 4 ~  "2 year degree", 
+   educ == 5 ~  "4 year degree", 
+   educ == 6 ~  "Post-grad", 
+   TRUE ~ NA_character_), edu = factor(edu, levels=c(
+     "Less than high school", "High school graduate", "Some college", "2 year degree", "4 year degree", "Post-grad" )))
> 
> #sample weights
> cces_design <- svydesign(ids=~0, weights=~weight, data=cces_demos_df)
> 
> #gender
> cces_gender <- svytable(~gender_demos, cces_design) %>% 
+         as_tibble() %>% mutate(variable = "Gender") %>%
+         rename(level = gender_demos)
> #partyid
> cces_pid <- svytable(~pid_demos, cces_design) %>% 
+   as_tibble() %>% mutate(variable = "Party ID") %>%
+   rename(level = pid_demos) 
> 
> #income 
> cces_inc <- svytable(~income_range, cces_design) %>% 
+   as_tibble() %>% 
+   mutate(variable = "Income")  %>%
+   rename(level = income_range) %>% 
+   filter(level != "Prefer not to say")
>   
> 
> #education 
> cces_edu <- svytable(~edu, cces_design)  %>% 
+   as_tibble() %>% mutate(variable = "Education") %>%
+   rename(level = edu)
> 
> #age
> cces_age <- svytable(~age_range, cces_design) %>% 
+   as_tibble() %>% mutate(variable = "Age") %>%
+   rename(level = age_range)
> 
> 
> cces_bind <- rbind(cces_gender,cces_inc, cces_age, cces_pid, cces_edu) %>%
+   group_by(variable) %>%
+   mutate(total = sum(n), prop = round((n/total*100), 2)) %>%
+   mutate(prop = paste0(prop,"%"))
> cces_bind
# A tibble: 23 × 5
# Groups:   variable [5]
   level                            n variable  total prop  
   <chr>                        <dbl> <chr>     <dbl> <chr> 
 1 Male                         8722. Gender   18000. 48.46%
 2 Female                       9278. Gender   18000. 51.54%
 3 Other                           0  Gender   18000. 0%    
 4 Less than $30,000            4692. Income   15800. 29.7% 
 5 Between $30,000 and $59,999  4663. Income   15800. 29.51%
 6 Between $60,000 and $120,000 4608. Income   15800. 29.17%
 7 $120,000 or more             1837. Income   15800. 11.63%
 8 18--29                       3799. Age      18000. 21.11%
 9 30--39                       2937. Age      18000. 16.32%
10 40--49                       2333. Age      18000. 12.96%
# … with 13 more rows
> 
> #grab the summary table object from lucid and replace the stats with our manually created ones. 
> cces_demos_table <- lucid_demos_table
> cces_demos_table[1:23] <- cces_bind$prop
> dimnames(cces_demos_table)[[2]] <- "2019 Cooperative Congressional Election Study (N=18,000, weighted) Comparison"
> cces_demos_table


|                                          |2019 Cooperative Congressional Election Study (N=18,000, weighted) Comparison |
|:-----------------------------------------|:-----------------------------------------------------------------------------|
|**Gender**                                |&nbsp;&nbsp;                                                                  |
|&nbsp;&nbsp; Male                         |48.46%                                                                        |
|&nbsp;&nbsp; Female                       |51.54%                                                                        |
|&nbsp;&nbsp; Other                        |0%                                                                            |
|**Income**                                |&nbsp;&nbsp;                                                                  |
|&nbsp;&nbsp; Less than $30,000            |29.7%                                                                         |
|&nbsp;&nbsp; Between $30,000 and $59,000  |29.51%                                                                        |
|&nbsp;&nbsp; Between $60,000 and $120,000 |29.17%                                                                        |
|&nbsp;&nbsp; More than $120,000           |11.63%                                                                        |
|**Age**                                   |&nbsp;&nbsp;                                                                  |
|&nbsp;&nbsp; 18--29                       |21.11%                                                                        |
|&nbsp;&nbsp; 30--39                       |16.32%                                                                        |
|&nbsp;&nbsp; 40--49                       |12.96%                                                                        |
|&nbsp;&nbsp; 50--59                       |16.89%                                                                        |
|&nbsp;&nbsp; 60--69                       |19.46%                                                                        |
|&nbsp;&nbsp; 70+                          |13.26%                                                                        |
|**Party ID**                              |&nbsp;&nbsp;                                                                  |
|&nbsp;&nbsp; Democratic                   |43.6%                                                                         |
|&nbsp;&nbsp; Republican                   |35.87%                                                                        |
|&nbsp;&nbsp; Independent                  |16.08%                                                                        |
|&nbsp;&nbsp; Other                        |4.44%                                                                         |
|**Education**                             |&nbsp;&nbsp;                                                                  |
|&nbsp;&nbsp; Less than high school        |9.14%                                                                         |
|&nbsp;&nbsp; High school graduate         |28.6%                                                                         |
|&nbsp;&nbsp; Some college                 |20.72%                                                                        |
|&nbsp;&nbsp; 2 year degree                |11.06%                                                                        |
|&nbsp;&nbsp; 4 year degree                |19.24%                                                                        |
|&nbsp;&nbsp; Post-grad                    |11.23%                                                                        |
> 
> 
> demographics <- cbind(bovitz_demos_table, lucid_demos_table, cces_demos_table)
> 
> 
> #Output table A1
> TableA1File<-file("TableA1.txt")
> writeLines(qable(demographics), TableA1File)
> close(TableA1File)
> 
> 
> 
> ### Table A2
> ###party ID
> bovitz_demos_df <- bovitz_demos_df %>% 
+   mutate(pid3_lean_factor = case_when(
+   pid3_lean == 1 ~ "Democrat", 
+   pid3_lean == 2 ~ "Independent",
+   pid3_lean == 3 ~ "Republican"))
> 
> #wave 1
> w1pid <- bovitz_demos_df %>% 
+   filter((task_num == 1 & profile_chosen == 1) | wave1_only == 1 ) %>% 
+   tabyl(pid3_lean_factor) %>%
+   adorn_totals() %>%
+   adorn_pct_formatting() %>% as_tibble() %>% mutate(wave = "Wave 1") %>% 
+   rename(demo = pid3_lean_factor)
> 
> 
> ##wave 2
> w2pid <- bovitz_demos_df %>% 
+   filter(task_num == 1 & profile_chosen == 1) %>% 
+   tabyl(pid3_lean_factor) %>%
+   adorn_totals() %>%
+   adorn_pct_formatting() %>% as_tibble() %>% mutate(wave = "Wave 2") %>% 
+   rename(demo = pid3_lean_factor)
>    
> 
> ## college or more 
> bovitz_demos_df <-  bovitz_demos_df %>% mutate(college_more = case_when(edu == "4 year degree" | edu == "Post-grad" ~ "College or more", 
+                                                    !is.na(edu) ~ "Less than college"))
> w1edu <- bovitz_demos_df %>% 
+   filter((task_num == 1 & profile_chosen == 1) | wave1_only == 1 ) %>%
+   tabyl(college_more) %>%
+   adorn_totals() %>%
+   adorn_pct_formatting() %>% as_tibble() %>% 
+   mutate(wave = "Wave 1") %>% 
+   rename(demo = college_more)
> 
> w2edu <- bovitz_demos_df %>% 
+   filter((task_num == 1 & profile_chosen == 1)) %>%
+   tabyl(college_more) %>%
+   adorn_totals() %>%
+   adorn_pct_formatting() %>% as_tibble() %>% 
+   mutate(wave = "Wave 2")  %>% 
+   rename(demo = college_more)
> 
> #white 
> bovitz_demos_df <-  bovitz_demos_df %>% mutate(white = case_when(race == 1 ~ "White", 
+                                                                         !is.na(race) ~ "Non-white"))
> 
> w1race <- bovitz_demos_df %>% 
+   filter((task_num == 1 & profile_chosen == 1) | wave1_only == 1 ) %>%
+   tabyl(white) %>%
+   adorn_totals() %>%
+   adorn_pct_formatting() %>% 
+   as_tibble() %>% 
+   mutate(wave ="Wave 1")  %>% 
+   rename(demo = white)
> 
> w2race <- bovitz_demos_df %>% 
+   filter((task_num == 1 & profile_chosen == 1) ) %>%
+   tabyl(white) %>%
+   adorn_totals() %>%
+   adorn_pct_formatting() %>% 
+   as_tibble() %>% 
+   mutate(wave ="Wave 2") %>%
+   rename(demo = white)
> 
> #gender 
> w1gender <- bovitz_demos_df %>% 
+   filter((task_num == 1 & profile_chosen == 1) | wave1_only == 1 ) %>%
+   tabyl(gender_demo) %>%
+   adorn_totals() %>%
+   adorn_pct_formatting() %>% 
+   as_tibble() %>% 
+   mutate(wave ="Wave 1") %>%
+   rename(demo = gender_demo)
> 
> w2gender <- bovitz_demos_df %>% 
+   filter((task_num == 1 & profile_chosen == 1) ) %>%
+   tabyl(gender_demo) %>%
+   adorn_totals() %>%
+   adorn_pct_formatting() %>% 
+   as_tibble() %>% 
+   mutate(wave ="Wave 2")  %>%
+   rename(demo = gender_demo)
> 
> 
> ##ethno
> w1ethno <- bovitz_demos_df %>% 
+   filter((task_num == 1 & profile_chosen == 1) | wave1_only == 1 ) %>%
+   summarize(eth_mean = mean(ethnocentrism), eth_sd = sd(ethnocentrism), n = n())  %>%
+   mutate(wave = "Wave 2", demo = "Ethnocentrism", percent = paste0("Mean=", round(eth_mean, 3), ", SD=", round(eth_sd, 3))) %>%
+   select(demo, percent, wave, n)
> 
> w2ethno <- bovitz_demos_df %>% 
+   filter((task_num == 1 & profile_chosen == 1)) %>%
+   summarize(eth_mean = mean(ethnocentrism), eth_sd = sd(ethnocentrism), n = n()) %>%
+   mutate(wave = "Wave 1", demo = "Ethnocentrism", percent = paste0("Mean=", round(eth_mean, 3), ", SD=", round(eth_sd, 3))) %>%
+   select(demo, percent, wave, n) 
> 
>  
> 
> nw2<- bovitz_demos_df %>% 
+   filter((task_num == 1 & profile_chosen == 1) ) %>%
+   nrow() 
> 
> nw1 <- bovitz_demos_df %>% 
+   filter((task_num == 1 & profile_chosen == 1) | wave1_only == 1 ) %>%
+   nrow()
> 
> 
> panel_attrition <- rbind(w1pid, w2pid, w1edu, w2edu, w1gender, w2gender, w1race, w2race, w1ethno, w2ethno ) %>% 
+   filter(demo == "College or more" |
+            demo == "Female" |
+            demo == "White" |
+            demo == "Democrat"|  demo == "Republican"|  demo == "Independent"| 
+            demo == "Ethnocentrism") %>% select(demo, percent, wave) %>% 
+    mutate(wave = case_when(wave == "Wave 1" ~ paste0(wave, " n=", nw1), 
+                            wave == "Wave 2"  ~ paste0(wave, " n=", nw2))) %>% 
+    pivot_wider(names_from = wave, values_from = percent)
> 
> panel_attrition
# A tibble: 7 × 3
  demo            `Wave 1 n=1539`      `Wave 2 n=996`      
  <chr>           <chr>                <chr>               
1 Democrat        52.7%                54.6%               
2 Independent     14.3%                12.9%               
3 Republican      33.0%                32.5%               
4 College or more 38.2%                42.4%               
5 Female          50.7%                50.0%               
6 White           66.1%                66.2%               
7 Ethnocentrism   Mean=0.522, SD=0.192 Mean=0.525, SD=0.191
> 
> #Output table A2
> TableA2File<-file("TableA2.txt")
> writeLines(qable(panel_attrition), TableA2File)
> close(TableA2File)
> 
> 
> 
> 
> results_data <- read.dta13("../final_w1w2.dta")
Warning message:
In read.dta13("../final_w1w2.dta") : 
   Factor codes of type double or float detected in variables

   educ, pid3_lean, pid3_lean_w2, mark_up_cat

   No labels have been assigned.
   Set option 'nonint.factors = TRUE' to assign labels anyway.

>   
> 
> #format
> results_data<- results_data %>% 
+   filter(is.na(wave1_only)) %>%
+   mutate(country = factor(country, levels=c("United States", "Germany", "A country outside the United States", "China"))) %>%
+   mutate(mark_up_cat = case_when(
+     mark_up == "1" ~ "100", 
+     mark_up == "0.25" ~ "25", 
+     mark_up == "0.5" ~ "50",
+     mark_up == "0" ~ "0")) %>% 
+   mutate(mark_up_cat =factor(mark_up_cat, levels=c("0", "25", "50", "100")))
> 
> 
> #bin our ethnocentrism measure 
> results_data <- results_data %>%
+   mutate(ethno_factor = case_when(
+     ethnocentrism <= .375 ~ "Low ethnocentrism", 
+     ethnocentrism > .375 & ethnocentrism < .625  ~ "Medium ethnocentrism", 
+     ethnocentrism >= .625  ~ "High ethnocentrism", 
+   )) %>% mutate(ethno_factor = as.factor(ethno_factor))
> 
> 
> #### Figure A2
> #### MM by product
> 
> c_design <- profile_chosen ~ mark_up_cat + rating_cat + country 
> mm_by_product <- cj(results_data, c_design, id = ~id, estimate = "mm", by = ~product_cat)
There were 36 warnings (use warnings() to see them)
> mm_by_product <- mm_by_product %>% mutate(feature_label = case_when(
+   feature == "mark_up_cat" ~ "Mark up over base price (%)", 
+   feature == "rating_cat"  ~ "Rating by past customers",
+   feature == "country"     ~ "Country of origin")) 
> 
> marginal_means_plot_by_product <- ggplot(mm_by_product, aes(y=estimate, x=level, color=product_cat,  group=product_cat)) +
+   facet_grid(feature_label~., scales="free") + 
+   coord_flip() + plot_theme + ylab("Marginal mean") + xlab("") + 
+   geom_point(position=position_dodge(width=.75)) + 
+   geom_errorbar(aes(ymin= lower, ymax= upper), width=0, position=position_dodge(width=.75)) + 
+   labs(col="Product") + theme(legend.position="right")
> marginal_means_plot_by_product
> ggsave("FigureA2.png",marginal_means_plot_by_product, dpi=300, width=8, height=9)
> 
> #### Figure A3
> #### MM by task number 
> 
> results_data <- results_data %>% mutate(task_num = as.factor(task_num))
> mm_by <- cj(results_data, c_design, id = ~id, estimate = "mm", by = ~ task_num)
There were 30 warnings (use warnings() to see them)
> mm_by <- mm_by %>% mutate(feature_label = case_when(
+   feature == "mark_up_cat" ~ "Mark up over base price (%)", 
+   feature == "rating_cat"  ~ "Rating by past customers",
+   feature == "country"     ~ "Country of origin")) %>%
+   mutate(task_num = factor(task_num, levels=c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10")))
> marginal_means_plot_by_task <- ggplot(mm_by, aes(y=estimate, x=level, color=task_num,  group=task_num)) +
+   facet_grid(feature_label~., scales="free") + 
+   coord_flip() + plot_theme + ylab("Marginal mean") + xlab("") + 
+   geom_point(position=position_dodge(width=.75)) + 
+   #geom_text(aes(label=round(estimate, digits = 2)),size = 2, nudge_x=.25) +
+   geom_errorbar(aes(ymin= lower, ymax= upper), size=1, width=0, position=position_dodge(width=.75)) + 
+   labs(col="Task number") + theme(legend.position="right")
> marginal_means_plot_by_task
> ggsave("FigureA3.png",marginal_means_plot_by_task, dpi=300, width=8, height=9)
> 
> 
> #### Figure A4 
> #### AMCE by product category 
> 
> 
> 
> ##### what is main effect of treatments? #####
> feature_names <- c("Mark up over base price (%)" , "Rating by past customers", "Country of origin" )
> 
> main_effects <- cj(results_data, c_design, id = ~task_num)
Warning message:
In logLik.svyglm(x) : svyglm not fitted by maximum likelihood.
> main_effects <- main_effects %>% mutate(feature_label = case_when(
+   feature == "mark_up_cat" ~ feature_names[1], 
+   feature == "rating_cat"  ~ feature_names[2],
+   feature == "country"     ~ feature_names[3])) %>%
+   mutate(feature_label = factor(feature_label, levels=rev(feature_names)))
> 
> #### appliances  ####
> appliances <- c("Washing Machine", "Toaster", "Microwave")
> main_effects_appliances <- cj(filter(results_data, product %in% appliances), c_design, id = ~task_num)
Warning message:
In logLik.svyglm(x) : svyglm not fitted by maximum likelihood.
> main_effects_appliances <- main_effects_appliances %>% mutate(feature_label = case_when(
+   feature == "mark_up_cat" ~ feature_names[1], 
+   feature == "rating_cat"  ~ feature_names[2],
+   feature == "country"     ~ feature_names[3])) %>%
+   mutate(feature_label = factor(feature_label, levels=rev(feature_names)))
> 
> #### food ####
> food <- c("1 pound of butter", "1 pound of cheese", "1 pound of coffee")
> main_effects_food <- cj(filter(results_data, product %in% food), c_design, id = ~task_num)
Warning message:
In logLik.svyglm(x) : svyglm not fitted by maximum likelihood.
> main_effects_food <- main_effects_food %>% mutate(feature_label = case_when(
+   feature == "mark_up_cat" ~ feature_names[1], 
+   feature == "rating_cat"  ~ feature_names[2],
+   feature == "country"     ~ feature_names[3])) %>%
+   mutate(feature_label = factor(feature_label, levels=rev(feature_names)))
> 
> 
> #### Household goods #### 
> household <- c("Roll of black duct tape", "4 rolls of paper towels", "96 oz laundry detergent", "12-inch non-stick skillet", "Small bottle of super glue", "Cell phone screen protector")
> 
> main_effects_household<- cj(filter(results_data, product %in% household), c_design, id = ~task_num)
Warning message:
In logLik.svyglm(x) : svyglm not fitted by maximum likelihood.
> main_effects_household <- main_effects_household %>% mutate(feature_label = case_when(
+   feature == "mark_up_cat" ~ feature_names[1], 
+   feature == "rating_cat"  ~ feature_names[2],
+   feature == "country"     ~ feature_names[3])) %>%
+   mutate(feature_label = factor(feature_label, levels=rev(feature_names)))
> 
> ## label 
> main_effects_household$products <- "Household goods"
> main_effects_food$products <- "Food"
> main_effects_appliances$products <- "Appliances"
> main_effects$products <- "All products"
> 
> product_results <- bind_rows(main_effects_household, main_effects_food, main_effects_appliances, main_effects)
> 
> pd_combined<-position_dodge(.75)
> products_plot <- ggplot(product_results, aes(y=estimate, x=level, group=products)) + geom_hline(yintercept = 0, color="#A0A0A0" ) +
+   facet_grid(.~feature_label, scales="free") + 
+   geom_errorbar(aes(ymin=lower, ymax=upper), width=0, position=pd_combined)+
+   plot_theme + ylab("AMCE relative to base attribute level") + xlab("") + 
+   geom_point(aes(fill=products, shape=products),color="black",size=4,position=pd_combined) +
+   scale_shape_manual(values=c(21,22, 23, 24))
> 
> products_plot
> ggsave("FigureA4.png",products_plot, dpi=300, width=12, height=8)
> 
> 
> 
> 
> 
> ######
> # Figure A5
> ######
> 
> # Center and Right Panel - AMCE and MTWP across whole sample - takes a long time to run.
> # Note that aggregated markup estimates are presented in Figure A5 in the appendix. 
> 
> data_for_cjoint <- results_data %>%  dplyr::select(profile_chosen, mark_up_cat, rating_cat, country, id, task_num, product_cat, ethno_factor) %>%
+   drop_na() %>%
+   mutate(country = relevel(country, ref="United States")) %>%
+   mutate(rating_cat = relevel(rating_cat, ref="5 out of 5 stars"))
> 
> results_vector = list()
> estimates_results = list()
> resp_ids <- unique(data_for_cjoint$id)
> set.seed(1234)
> 
> 
> for (run in 1:runs) {
+   # sample with replacement from a vector as long as our dataset. 
+   # we want a sample that matches our sample size. will have repeated observations. 
+   # create the new data set
+   samp <- sample(x = resp_ids, size = length(resp_ids), replace = TRUE)
+   
+   # this is *much* more efficient in data.table
+   bootSample <- as.data.table(data_for_cjoint)
+   setkey(bootSample, "id")
+   bootSample <- bootSample[J(samp), allow.cartesian = TRUE]
+   
+   #store complete results
+   results_vector[[run]] <- cjoint::amce(profile_chosen ~ mark_up_cat + rating_cat + country, data=bootSample, respondent.id = "id")
+   
+   #store estimates 
+   country <-  as.tibble(t(results_vector[[run]]$estimates$country), rownames="est")
+   markup <- as.tibble(t(results_vector[[run]]$estimates$markupcat), rownames="est")
+   #store the estimates
+   estimates_results[[run]] <- bind_rows(country, markup) %>% mutate(run = run)
+   #clear memory 
+   rm(country, markup)
+ }
Warning message:
`as.tibble()` was deprecated in tibble 2.0.0.
Please use `as_tibble()` instead.
The signature and semantics have changed, see `?as_tibble`.
This warning is displayed once every 8 hours.
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated. 
> 
> # combine results from the bootstrap 
> full_sample_results <- bind_rows(estimates_results) 
> # clean out 
> rm(estimates_results, results_vector)
> 
> # Disaggregated by markup 
> # get the markup coeffs to joint back into the dataset
> full_sample_markups <- full_sample_results %>% filter(grepl('markup', est)) %>% 
+   select(est, AMCE, run) %>%
+   rename(markup_est = AMCE, markup = est)
> 
> # join markup coefficients and calculate MWTP
> full_sample_results_summary_disag <-  full_sample_results %>% 
+   filter(grepl('country', est)) %>%  
+   left_join(full_sample_markups, by=c("run")) %>%
+   mutate(markup_num = str_replace(markup, "markupcat", "")) %>%
+   mutate(mwtp = AMCE/(markup_est/as.numeric(markup_num))) %>%
+   group_by(est, markup_num) %>%
+   summarise(mean_amce = mean(AMCE), 
+             lower_amce =quantile(AMCE, probs=c(.025)), 
+             upper_amce = quantile(AMCE, probs=c(.975)), 
+             mean_mwtp = mean(mwtp), 
+             lower_mwtp =quantile(mwtp, probs=c(.025)), 
+             upper_mwtp = quantile(mwtp, probs=c(.975))) %>%
+   mutate(ethno_factor = "Full sample") %>%
+   rename(main = est)
`summarise()` has grouped output by 'est'. You can override using the `.groups` argument.
> 
> # grab MTWP
> mwtp_full_sample_results_disag <- full_sample_results_summary_disag %>% 
+   select(main, mean_mwtp, lower_mwtp, upper_mwtp, ethno_factor, markup_num ) %>%
+   rename(mean = mean_mwtp, upper = upper_mwtp, lower=lower_mwtp) %>% mutate(analysis = "MWTP")
> 
> ###############################
> # Estimates by level of ethnocentrism 
> ###############################
> 
> results_vector = list()
> estimates_results = list()
> resp_ids <- unique(data_for_cjoint$id)
> set.seed(1234)
> 
> for (run in 1:runs) {
+   # sample with replacement from a vector as long as our dataset. 
+   # we want a sample that matches our sample size. will have repeated observations. 
+   # create the new data set
+   samp <- sample(x = resp_ids, size = length(resp_ids), replace = TRUE)
+   
+   #sample
+   bootSample <- as.data.table(data_for_cjoint)
+   setkey(bootSample, "id")
+   bootSample <- bootSample[J(samp), allow.cartesian = TRUE]
+   
+   #store complete results
+   results_vector[[run]] <- cjoint::amce(profile_chosen ~ mark_up_cat + rating_cat + country*ethno_factor, data=bootSample,  respondent.varying = "ethno_factor", respondent.id = "id")
+   
+   #store estimates 
+   country_ethno <-  as.tibble(t(results_vector[[run]]$cond.estimates$`country:ethnofactor`), rownames="est")
+   country <-  as.tibble(t(results_vector[[run]]$cond.estimates$country), rownames="est")
+   ethno <- as.tibble(t(results_vector[[run]]$cond.estimates$ethnofactor), rownames="est")
+   markup <- as.tibble(t(results_vector[[run]]$estimates$markupcat), rownames="est")
+   estimates_results[[run]] <- bind_rows(country_ethno, country, ethno, markup) %>% mutate(run = run)
+   rm(country_ethno,country,ethno,markup)
+ }
> 
> #combine results 
> results <- bind_rows(estimates_results) %>% separate(est, sep=":", c("main", "conditional") )
Warning message:
Expected 2 pieces. Missing pieces filled with `NA` in 24000 rows [7, 8, 9, 10, 11, 12, 13, 14, 21, 22, 23, 24, 25, 26, 27, 28, 35, 36, 37, 38, ...]. 
> #rm(estimates_results, results_vector)
> 
> # pull out the markup estimates
> markups <- results %>% filter(grepl('markup', main)) %>% 
+   select(main, AMCE, run) %>%
+   rename(markup_est = AMCE, markup = main)
> 
> # pull out estimates for high ethnocentrism  
> high_ethno <- results %>% filter(grepl('country', main) & is.na(conditional)) %>% 
+   mutate(ethno_factor = "High ethnocentrism") %>%
+   select(`Conditional Estimate`, ethno_factor, run, main) %>%
+   rename(estimate = `Conditional Estimate` )
> 
> # pull out estimates for low ethnocentrism 
> low_ethno <- results %>% 
+   filter(grepl('country', main) & (is.na(conditional) | conditional == "ethnofactorLowethnocentrism") ) %>%
+   mutate(ethno_factor = "Low ethnocentrism") %>%
+   group_by(run, main, ethno_factor) %>%
+   summarize( estimate =  sum (`Conditional Estimate`))  %>%
+   ungroup()
`summarise()` has grouped output by 'run', 'main'. You can override using the `.groups` argument.
>   
> 
> 
> 
> ## Binned ethnocentrism results, disaggregated by markup
> full_results_disag <- bind_rows(high_ethno, low_ethno) %>% 
+   left_join(markups, by=c("run")) %>%
+   mutate(markup_num = str_replace(markup, "markupcat", "")) %>%
+   mutate(mwtp = estimate/(markup_est/as.numeric(markup_num))) %>%
+   group_by(main, ethno_factor, markup_num) %>%
+   summarise(mean_amce = mean(estimate), 
+             lower_amce =quantile(estimate, probs=c(.025)), 
+             upper_amce = quantile(estimate, probs=c(.975)), 
+             mean_mwtp = mean(mwtp), 
+             lower_mwtp =quantile(mwtp, probs=c(.025)), 
+             upper_mwtp = quantile(mwtp, probs=c(.975)))
`summarise()` has grouped output by 'main', 'ethno_factor'. You can override using the `.groups` argument.
> 
> 
> mwtp_results_disag <- full_results_disag %>% select(main, mean_mwtp, lower_mwtp, upper_mwtp, ethno_factor, markup_num) %>%
+   rename(mean = mean_mwtp, upper = upper_mwtp, lower=lower_mwtp) %>% mutate(analysis = "MWTP")
> 
> # Combine results that are aggregated by markup 
> binned_results_disag <- bind_rows(mwtp_full_sample_results_disag, mwtp_results_disag) %>%
+   rename(estimate = mean, BY = ethno_factor, statistic = analysis, level = main)
> 
> 
> # prep results for plot 
> for_plot_disag <- binned_results_disag %>%
+   filter(statistic == "MWTP") %>%
+   mutate(level = str_remove(level, "^country")) %>%
+   mutate(level= case_when(level == "AcountryoutsidetheUnitedStates" | level == "A country outside the United States" ~ "Outside the\nUnited States", 
+                           TRUE ~ level)) %>% 
+   mutate(BY = case_when(
+     BY == "Full sample" ~ "Sample-wide average", 
+     TRUE ~ BY )) %>%
+   mutate(BY = factor(BY, levels=c("Sample-wide average", "Low ethnocentrism", "High ethnocentrism"))) %>%
+   mutate(level = factor(level, levels = c("China",  "Outside the\nUnited States", "Germany", "United States"))) %>%
+   mutate(facet_label = paste0("Markup: ", markup_num, "% over baseline")) %>%
+   mutate(facet_label = factor(facet_label, levels = c("Markup: 25% over baseline", "Markup: 50% over baseline", "Markup: 100% over baseline")))
> 
> 
> 
> mtwp_disag_plot <- ggplot(data=for_plot_disag, aes(y=estimate, x=level, group=BY)) +
+   geom_errorbar(aes(ymin=lower, ymax=upper), width=0, position=pd_combined)+
+   geom_point(aes(fill=BY, shape=BY),color="black",size=4,position=pd_combined) + 
+   xlab("Country") +
+   ylab("Estimate") +
+   scale_shape_manual(values=c(22,21, 23, 24))+
+   facet_wrap(facet_label~.) + plot_theme
> mtwp_disag_plot
> 
> 
> ggsave("FigureA5.png", mtwp_disag_plot, dpi=600, width=14, height=7)
> 
> 
> 
> ##############
> #### Figure A7
> ##############
> 
> #### Conditional Logit #### 
> library(survival)
> 
> results_vector = list()
> est_list = list()
> resp_ids <- unique(results_data$id)
> results_data <- results_data %>% group_by(task_num,id) %>% 
+   mutate(strata_id = group_indices()) %>% mutate(country = relevel(country, ref="United States"))
Warning message:
Problem while computing `strata_id = group_indices()`.
ℹ `group_indices()` was deprecated in dplyr 1.0.0. Please use `cur_group_id()` instead.
ℹ The warning occurred in group 1: task_num = 1, id = 4. 
> 
> ### binned estimates
> set.seed(1234)
> for (run in 1:runs) {
+ 
+   samp <- sample(x = resp_ids, size = length(resp_ids), replace = TRUE)
+   bootSample <- as.data.table(results_data)
+   setkey(bootSample, "id")
+   bootSample <- bootSample[J(samp), allow.cartesian = TRUE]
+   results_vector[[run]] <- clogit(profile_chosen~mark_up+country*ethno_factor+rating_cat+strata(strata_id),  data=bootSample, method="approximate")
+   est_list[[run]] <-as.tibble(results_vector[[run]]$coefficients, rownames="coefficient")
+   est_list[[run]]$run <- run
+ }
> 
> clogit_bs_results <- bind_rows(est_list)
> 
> markups <- clogit_bs_results %>% filter(grepl('mark_up', coefficient)) %>% rename(markup_est= value) %>% select(run, markup_est)
> 
> high_ethno <- clogit_bs_results %>% filter(grepl('country', coefficient)) %>% 
+   filter(!(grepl(':ethno_factorMedium ethnocentrism', coefficient))) %>%
+   filter(!(grepl(':ethno_factorLow ethnocentrism', coefficient))) %>%
+   mutate(ethno_factor = "High ethnocentrism") 
> 
> low_ethno <- clogit_bs_results %>% 
+   filter(grepl('country', coefficient)) %>% 
+   filter(!(grepl(':ethno_factorMedium ethnocentrism', coefficient))) %>%
+   separate(coefficient, sep=":", c("coefficient", "conditional")) %>%
+   group_by(run, coefficient) %>%
+   summarize( value =  sum (value)) %>% 
+   mutate(ethno_factor = "Low ethnocentrism") 
`summarise()` has grouped output by 'run'. You can override using the `.groups` argument.
Warning message:
Expected 2 pieces. Missing pieces filled with `NA` in 9000 rows [1, 2, 3, 7, 8, 9, 13, 14, 15, 19, 20, 21, 25, 26, 27, 31, 32, 33, 37, 38, ...]. 
> 
> 
> full_results_clogit <- bind_rows(high_ethno, low_ethno) %>% 
+   left_join(markups, by=c("run")) %>%
+   mutate(mwtp = value/markup_est) %>%
+   group_by(coefficient, ethno_factor) %>%
+   summarise(mean_mwtp = mean(mwtp)*100,
+             lower_mwtp =quantile(mwtp, probs=c(.025))*100, 
+             upper_mwtp = quantile(mwtp, probs=c(.975))*100)
`summarise()` has grouped output by 'coefficient'. You can override using the `.groups` argument.
> 
> 
> # Sample-wide estimates  
> set.seed(1234)
> est_list = list()
> for (run in 1:runs) {
+   samp <- sample(x = resp_ids, size = length(resp_ids), replace = TRUE)
+   bootSample <- as.data.table(results_data)
+   setkey(bootSample, "id")
+   bootSample <- bootSample[J(samp), allow.cartesian = TRUE]
+   results_vector[[run]] <- clogit(profile_chosen~mark_up+country+rating_cat+strata(strata_id),  data=bootSample, method="approximate")
+   est_list[[run]] <-as.tibble(results_vector[[run]]$coefficients, rownames="coefficient")
+   est_list[[run]]$run <- run
+ }
> 
> clogit_bs_results_unconditional <- bind_rows(est_list)
> markups <- clogit_bs_results_unconditional %>% filter(grepl('mark_up', coefficient)) %>% rename(markup_est= value) %>% select(run, markup_est)
> 
> unconditional_clogit <-  clogit_bs_results_unconditional %>%  filter(grepl('country', coefficient)) %>%
+   left_join(markups, by=c("run")) %>%
+   mutate(mwtp = value/markup_est) %>%
+   group_by(coefficient) %>%
+   summarise(mean_mwtp = mean(mwtp)*100,
+             lower_mwtp =quantile(mwtp, probs=c(.025))*100, 
+             upper_mwtp = quantile(mwtp, probs=c(.975))*100) %>%
+   mutate(ethno_factor = "Sample-wide average") 
> 
> 
> clogit_for_plot <- bind_rows(unconditional_clogit, full_results_clogit  ) %>% 
+   mutate(coefficient = str_replace(coefficient, "country", "")) %>%
+   mutate(ethno_factor = factor(ethno_factor, levels=c("Sample-wide average", "Low ethnocentrism", "High ethnocentrism"))) %>%
+   mutate(coefficient = case_when(coefficient == "A country outside the United States" ~ "Outside the\nUnited States", TRUE ~ coefficient)) %>%
+   mutate(coefficient = factor(coefficient, levels = c("China",  "Outside the\nUnited States", "Germany", "United States"))) 
> 
> ### put together plot
> pd_combined<-position_dodge(.75)
> clogit_mtwp_averages_plot <- ggplot(data=clogit_for_plot, aes(y=mean_mwtp, x=coefficient, group=ethno_factor)) +
+   geom_errorbar(aes(ymin=lower_mwtp, ymax=upper_mwtp), width=0, position=pd_combined)+
+   geom_point(aes(fill=ethno_factor, shape=ethno_factor),color="black",size=4,position=pd_combined) + 
+   xlab("Country") +
+   ylab("Estimate") +
+   scale_shape_manual(values=c(22,21, 23, 24))+ plot_theme
> clogit_mtwp_averages_plot
> 
> 
> ggsave("FigureA7.png", clogit_mtwp_averages_plot, dpi = 300, width=8.5, height=6, units="in")
> 
> 
> 
> 
> 
> #### Table A5
> ## read data fresh
> 
> ##############
> ##With College Education#### 
> ##########
> 
> ##Read in data 
> results_data <- read.dta13("../final_w1w2.dta")  %>% filter(is.na(wave1_only))
Warning message:
In read.dta13("../final_w1w2.dta") : 
   Factor codes of type double or float detected in variables

   educ, pid3_lean, pid3_lean_w2, mark_up_cat

   No labels have been assigned.
   Set option 'nonint.factors = TRUE' to assign labels anyway.

> with(results_data, table(educ))
educ
   1    2    3    4    5 
4700 4520 2260 5720 2720 
> results_data <- results_data %>% 
+   filter(educ >=4)
> nrow(results_data)
[1] 8440
> 
> #format
> results_data<- results_data %>% 
+   mutate(country = factor(country, levels=c("United States", "Germany", "A country outside the United States", "China"))) %>%
+   mutate(mark_up_cat = case_when(
+     mark_up == "1" ~ "100", 
+     mark_up == "0.25" ~ "25", 
+     mark_up == "0.5" ~ "50",
+     mark_up == "0" ~ "0")) %>% 
+   mutate(mark_up_cat =factor(mark_up_cat, levels=c("0", "25", "50", "100")))
> 
> 
> #bin our ethnocentrism measure 
> results_data <- results_data %>%
+   mutate(ethno_factor = case_when(
+     ethnocentrism <= .375 ~ "Low ethnocentrism", 
+     ethnocentrism > .375 & ethnocentrism < .625  ~ "Medium ethnocentrism", 
+     ethnocentrism >= .625  ~ "High ethnocentrism", 
+   )) %>% mutate(ethno_factor = as.factor(ethno_factor))
> 
> 
> data_for_cjoint <- results_data %>%  dplyr::select(profile_chosen, mark_up_cat, rating_cat, country, id, product_cat, ethno_factor) %>%
+   drop_na() %>%
+   mutate(country = relevel(country, ref="United States")) %>%
+   mutate(rating_cat = relevel(rating_cat, ref="5 out of 5 stars"))
> 
> 
> ###############################
> # Estimates by level of ethnocentrism 
> ###############################
> 
> results_vector = list()
> estimates_results = list()
> resp_ids <- unique(data_for_cjoint$id)
> set.seed(1234)
> 
> for (run in 1:runs) {
+   # sample with replacement from a vector as long as our dataset. 
+   samp <- sample(x = resp_ids, size = length(resp_ids), replace = TRUE)
+   
+   #sample
+   bootSample <- as.data.table(data_for_cjoint)
+   setkey(bootSample, "id")
+   bootSample <- bootSample[J(samp), allow.cartesian = TRUE]
+   
+   #store complete results
+   results_vector[[run]] <- cjoint::amce(profile_chosen ~ mark_up_cat + rating_cat + country*ethno_factor, data=bootSample,  respondent.varying = "ethno_factor", respondent.id = "id")
+   
+   #store estimates 
+   country_ethno <-  as.tibble(t(results_vector[[run]]$cond.estimates$`country:ethnofactor`), rownames="est")
+   country <-  as.tibble(t(results_vector[[run]]$cond.estimates$country), rownames="est")
+   ethno <- as.tibble(t(results_vector[[run]]$cond.estimates$ethnofactor), rownames="est")
+   markup <- as.tibble(t(results_vector[[run]]$estimates$markupcat), rownames="est")
+   estimates_results[[run]] <- bind_rows(country_ethno, country, ethno, markup) %>% mutate(run = run)
+   rm(country_ethno,country,ethno,markup)
+ }
> 
> #combine results 
> results <- bind_rows(estimates_results) %>% separate(est, sep=":", c("main", "conditional") )
Warning message:
Expected 2 pieces. Missing pieces filled with `NA` in 24000 rows [7, 8, 9, 10, 11, 12, 13, 14, 21, 22, 23, 24, 25, 26, 27, 28, 35, 36, 37, 38, ...]. 
> 
> # pull out the markup estimates
> markups <- results %>% filter(grepl('markup', main)) %>% 
+   select(main, AMCE, run) %>%
+   rename(markup_est = AMCE, markup = main)
> 
> # pull out estimates for high ethnocentrism  
> high_ethno <- results %>% 
+   filter(grepl('country', main) & is.na(conditional)) %>% 
+   mutate(ethno_factor = "High ethnocentrism") %>%
+   select(`Conditional Estimate`, ethno_factor, run, main) %>%
+   rename(estimate = `Conditional Estimate` )
> 
> # pull out estimates for medium ethnocentrism  
> medium_ethno <- results %>% 
+   filter(grepl('country', main) & (is.na(conditional) | conditional == "ethnofactorMediumethnocentrism") ) %>%
+   mutate(ethno_factor = "Medium ethnocentrism") %>%
+   group_by(run, main, ethno_factor) %>%
+   summarize( estimate =  sum (`Conditional Estimate`))  %>%
+   ungroup()
`summarise()` has grouped output by 'run', 'main'. You can override using the `.groups` argument.
> 
> # pull out estimates for low ethnocentrism 
> low_ethno <- results %>% 
+   filter(grepl('country', main) & (is.na(conditional) | conditional == "ethnofactorLowethnocentrism") ) %>%
+   mutate(ethno_factor = "Low ethnocentrism") %>%
+   group_by(run, main, ethno_factor) %>%
+   summarize( estimate =  sum (`Conditional Estimate`))  %>%
+   ungroup()
`summarise()` has grouped output by 'run', 'main'. You can override using the `.groups` argument.
> 
> ## Binned ethnocentrism results, averaging over the markups
> full_results <- bind_rows(high_ethno, medium_ethno, low_ethno) %>% 
+   left_join(markups, by=c("run")) %>%
+   mutate(markup_num = str_replace(markup, "markupcat", "")) %>%
+   mutate(mwtp = estimate/(markup_est/as.numeric(markup_num))) %>%
+   group_by(main, ethno_factor) %>%
+   summarise(mean_amce = mean(estimate), 
+             lower_amce =quantile(estimate, probs=c(.025)), 
+             upper_amce = quantile(estimate, probs=c(.975)), 
+             mean_mwtp = mean(mwtp), 
+             lower_mwtp =quantile(mwtp, probs=c(.025)), 
+             upper_mwtp = quantile(mwtp, probs=c(.975)))
`summarise()` has grouped output by 'main'. You can override using the `.groups` argument.
> 
> full_results_college<-full_results
> 
> mwtp_results_college <- full_results_college %>% select(main, mean_mwtp, lower_mwtp, upper_mwtp, ethno_factor ) %>%
+   rename(mean = mean_mwtp, upper = upper_mwtp, lower=lower_mwtp) %>% mutate(analysis = "MWTP")
> mwtp_results_college
# A tibble: 9 × 6
# Groups:   main [3]
  main                                   mean lower upper ethno_factor         analysis
  <chr>                                 <dbl> <dbl> <dbl> <chr>                <chr>   
1 countryAcountryoutsidetheUnitedStates  42.3 24.4   64.9 High ethnocentrism   MWTP    
2 countryAcountryoutsidetheUnitedStates  29.7 15.3   48.5 Low ethnocentrism    MWTP    
3 countryAcountryoutsidetheUnitedStates  39.4 22.8   61.2 Medium ethnocentrism MWTP    
4 countryChina                           96.7 64.1  135.  High ethnocentrism   MWTP    
5 countryChina                           57.4 34.6   85.2 Low ethnocentrism    MWTP    
6 countryChina                           74.4 48.6  105.  Medium ethnocentrism MWTP    
7 countryGermany                         41.7 24.8   63.3 High ethnocentrism   MWTP    
8 countryGermany                         15.5  2.78  31.2 Low ethnocentrism    MWTP    
9 countryGermany                         22.7 10.1   39.1 Medium ethnocentrism MWTP    
> 
> ##Without College Education####
> 
> 
> ##Read in data (assuming WD set to same directory as data)
> results_data <- read.dta13("../final_w1w2.dta")  %>% filter(is.na(wave1_only))
Warning message:
In read.dta13("../final_w1w2.dta") : 
   Factor codes of type double or float detected in variables

   educ, pid3_lean, pid3_lean_w2, mark_up_cat

   No labels have been assigned.
   Set option 'nonint.factors = TRUE' to assign labels anyway.

> with(results_data, table(educ)) 
educ
   1    2    3    4    5 
4700 4520 2260 5720 2720 
> results_data <- results_data %>% 
+   filter(educ <4)
> nrow(results_data)
[1] 11480
> 
> #format
> results_data<- results_data %>% 
+   mutate(country = factor(country, levels=c("United States", "Germany", "A country outside the United States", "China"))) %>%
+   mutate(mark_up_cat = case_when(
+     mark_up == "1" ~ "100", 
+     mark_up == "0.25" ~ "25", 
+     mark_up == "0.5" ~ "50",
+     mark_up == "0" ~ "0")) %>% 
+   mutate(mark_up_cat =factor(mark_up_cat, levels=c("0", "25", "50", "100")))
> 
> 
> #bin our ethnocentrism measure 
> results_data <- results_data %>%
+   mutate(ethno_factor = case_when(
+     ethnocentrism <= .375 ~ "Low ethnocentrism", 
+     ethnocentrism > .375 & ethnocentrism < .625  ~ "Medium ethnocentrism", 
+     ethnocentrism >= .625  ~ "High ethnocentrism", 
+   )) %>% mutate(ethno_factor = as.factor(ethno_factor))
> 
> data_for_cjoint <- results_data %>%  dplyr::select(profile_chosen, mark_up_cat, rating_cat, country, id, product_cat, ethno_factor) %>%
+   drop_na() %>%
+   mutate(country = relevel(country, ref="United States")) %>%
+   mutate(rating_cat = relevel(rating_cat, ref="5 out of 5 stars"))
> 
> ###############################
> # Estimates by level of ethnocentrism 
> ###############################
> 
> results_vector = list()
> estimates_results = list()
> resp_ids <- unique(data_for_cjoint$id)
> set.seed(1234)
> 
> for (run in 1:runs) {
+   # sample with replacement from a vector as long as our dataset. 
+   samp <- sample(x = resp_ids, size = length(resp_ids), replace = TRUE)
+   
+   #sample
+   bootSample <- as.data.table(data_for_cjoint)
+   setkey(bootSample, "id")
+   bootSample <- bootSample[J(samp), allow.cartesian = TRUE]
+   
+   #store complete results
+   results_vector[[run]] <- cjoint::amce(profile_chosen ~ mark_up_cat + rating_cat + country*ethno_factor, data=bootSample,  respondent.varying = "ethno_factor", respondent.id = "id")
+   
+   #store estimates 
+   country_ethno <-  as.tibble(t(results_vector[[run]]$cond.estimates$`country:ethnofactor`), rownames="est")
+   country <-  as.tibble(t(results_vector[[run]]$cond.estimates$country), rownames="est")
+   ethno <- as.tibble(t(results_vector[[run]]$cond.estimates$ethnofactor), rownames="est")
+   markup <- as.tibble(t(results_vector[[run]]$estimates$markupcat), rownames="est")
+   estimates_results[[run]] <- bind_rows(country_ethno, country, ethno, markup) %>% mutate(run = run)
+   rm(country_ethno,country,ethno,markup)
+ }
> 
> #combine results 
> results <- bind_rows(estimates_results) %>% separate(est, sep=":", c("main", "conditional") )
Warning message:
Expected 2 pieces. Missing pieces filled with `NA` in 24000 rows [7, 8, 9, 10, 11, 12, 13, 14, 21, 22, 23, 24, 25, 26, 27, 28, 35, 36, 37, 38, ...]. 
> 
> # pull out the markup estimates
> markups <- results %>% filter(grepl('markup', main)) %>% 
+   select(main, AMCE, run) %>%
+   rename(markup_est = AMCE, markup = main)
> 
> # pull out estimates for high ethnocentrism  
> high_ethno <- results %>% 
+   filter(grepl('country', main) & is.na(conditional)) %>% 
+   mutate(ethno_factor = "High ethnocentrism") %>%
+   select(`Conditional Estimate`, ethno_factor, run, main) %>%
+   rename(estimate = `Conditional Estimate` )
> 
> # pull out estimates for medium ethnocentrism  
> medium_ethno <- results %>% 
+   filter(grepl('country', main) & (is.na(conditional) | conditional == "ethnofactorMediumethnocentrism") ) %>%
+   mutate(ethno_factor = "Medium ethnocentrism") %>%
+   group_by(run, main, ethno_factor) %>%
+   summarize( estimate =  sum (`Conditional Estimate`))  %>%
+   ungroup()
`summarise()` has grouped output by 'run', 'main'. You can override using the `.groups` argument.
> 
> # pull out estimates for low ethnocentrism 
> low_ethno <- results %>% 
+   filter(grepl('country', main) & (is.na(conditional) | conditional == "ethnofactorLowethnocentrism") ) %>%
+   mutate(ethno_factor = "Low ethnocentrism") %>%
+   group_by(run, main, ethno_factor) %>%
+   summarize( estimate =  sum (`Conditional Estimate`))  %>%
+   ungroup()
`summarise()` has grouped output by 'run', 'main'. You can override using the `.groups` argument.
> 
> ## Binned ethnocentrism results, averaging over the markups
> full_results_nocollege <- bind_rows(high_ethno, medium_ethno, low_ethno) %>% 
+   left_join(markups, by=c("run")) %>%
+   mutate(markup_num = str_replace(markup, "markupcat", "")) %>%
+   mutate(mwtp = estimate/(markup_est/as.numeric(markup_num))) %>%
+   group_by(main, ethno_factor) %>%
+   summarise(mean_amce = mean(estimate), 
+             lower_amce =quantile(estimate, probs=c(.025)), 
+             upper_amce = quantile(estimate, probs=c(.975)), 
+             mean_mwtp = mean(mwtp), 
+             lower_mwtp =quantile(mwtp, probs=c(.025)), 
+             upper_mwtp = quantile(mwtp, probs=c(.975)))
`summarise()` has grouped output by 'main'. You can override using the `.groups` argument.
> 
> full_results_nocollege<-full_results_nocollege
> 
> 
> mwtp_results_nocollege <- full_results_nocollege %>% select(main, mean_mwtp, lower_mwtp, upper_mwtp, ethno_factor ) %>%
+   rename(mean = mean_mwtp, upper = upper_mwtp, lower=lower_mwtp) %>% mutate(analysis = "MWTP")
> 
> 
> mwtp_results_college <- mwtp_results_college %>% mutate(aware = "High awareness")
> mwtp_results_nocollege <- mwtp_results_nocollege %>% mutate(aware = "Low awareness")
> 
> mwtip_by_aware <- rbind(mwtp_results_college, mwtp_results_nocollege) %>% 
+   mutate(main = str_remove(main, "country")) %>%
+   mutate(main = case_when(main== "AcountryoutsidetheUnitedStates" ~ "Outside U.S.", 
+                            TRUE ~ main)) %>%
+   mutate(mwtp_w_ci = paste0 (round(mean,2), " (", round(lower,2), ", ", round(upper,2), ")" )) %>%
+   select(main, ethno_factor, aware,mwtp_w_ci ) %>%
+   mutate(main = factor(main, levels = c("China", "Germany",  "Outside U.S."))) %>%
+   mutate(ethno_factor = factor(ethno_factor, levels = c("Low ethnocentrism", "Medium ethnocentrism", "High ethnocentrism"))) %>%
+   mutate(aware = factor(aware, levels = c("Low awareness", "High awareness"))) %>% arrange(aware) %>%
+   pivot_wider(names_from=aware, values_from=mwtp_w_ci ) %>% arrange(main, ethno_factor) %>%
+   rename(Country = main, Ethnocentrism = ethno_factor) %>%
+   qable()
> 
> #Output table A5
> TableA5File<-file("TableA5.txt")
> writeLines(mwtip_by_aware, TableA5File)
> close(TableA5File)
> 
> 
> ##TableA6 
> 
> ft_data <- read.dta13("../final_w1w2.dta") %>% filter(is.na(wave1_only)) %>%
+   select(id, ft_mexico, ft_china, ft_UK, ft_germ, ft_japan, ft_PR, ft_us) %>%
+   group_by(id) %>%
+   pivot_longer(cols =starts_with("ft_"), names_to="country") %>%
+   na.omit() %>%
+   mutate(country = case_when(country == "ft_mexico" ~ "Mexico",
+                              country == "ft_UK" ~ "United Kingdom",
+                              country == "ft_us" ~ "United States",
+                              country == "ft_germ" ~ "Germany",
+                              country == "ft_japan" ~ "Japan",
+                              country == "ft_china" ~ "China",
+                              country == "ft_PR" ~ "Puerto Rico")) %>%
+   group_by(country) %>%
+   summarize(ft_mean = mean(value), ft_sd = sd(value)) %>%
+   mutate(`Mean (SD)` = paste0(round(ft_mean, 2), " (", round(ft_sd, 2), ")")) %>%
+   rename(Country = country) %>%
+   select(Country, `Mean (SD)`) %>% qable()
Warning message:
In read.dta13("../final_w1w2.dta") : 
   Factor codes of type double or float detected in variables

   educ, pid3_lean, pid3_lean_w2, mark_up_cat

   No labels have been assigned.
   Set option 'nonint.factors = TRUE' to assign labels anyway.

> 
> 
> #Output table A6
> TableA6File<-file("TableA6.txt")
> writeLines(ft_data, TableA6File)
> close(TableA6File)
> 